/*==============================================================================
FILE NAME: Create_industry_complaints.do
INPUTS: facility_characteristics.dta, incidents.dta, 
OUTPUTS: Industry/industry `i' complaints.dta, industry_air_complaints.dta
CREATED: 27 September 2023
UPDATED: 23 July 2025
==============================================================================*/

//create industry-by-month panel of complaints
set more off

/* Set directory if working independently through code
if c(username)=="" { //insert username
	global rootdir "" // insert root path
	global processed_data "$rootdir/processed_data"  // Define global paths for replication package
} 
*/

cap mkdir "$processed_data/Industry"

//create industry 
use "$processed_data/facility_characteristics.dta", clear
keep RegulatedEntityNo SIC
rename RegulatedEntityNo RegulatedEntity
gen SIC_2digit = substr(SIC,1,2)
destring SIC_2digit, replace
replace SIC_2digit=. if SIC_2digit==99
drop if SIC_2digit==.
drop SIC 
duplicates drop
sort RegulatedEntity SIC_2digit
by RegulatedEntity: gen nobs=_N
by RegulatedEntity: gen ind=_n
tab nobs
//81% of RNs have only one 2-digit SIC code
drop nobs
tab SIC_2digit
reshape wide SIC_2digit, i(RegulatedEntity) j(ind)
sort RegulatedEntity 
save "$processed_data/SIC_codes_temp.dta", replace

//assign complaints to industries and then create monthly count of complaints by industry
use "$processed_data/incidents.dta", clear
keep if Media=="AIR"
sort RegulatedEntity
merge m:1 RegulatedEntity using "$processed_data/SIC_codes_temp.dta"
drop if _merge==2
keep ComplaintIncident SIC_2digit1-SIC_2digit12 IncidentRecDate IncidentStatus
duplicates drop
isid ComplaintIncident
gen temp = date(IncidentRecDate,"MDY")
drop IncidentRecDate 
rename temp IncidentRecDate
format IncidentRecDate %td
replace IncidentRecDate = . if IncidentRecDate < 0
gen year=year(IncidentRecDate)
gen month=month(IncidentRecDate)
gen mdate=ym(year,month)
format mdate %tm
keep if mdate>=tm(2003m1) & mdate<=tm(2019m12)
tab IncidentStatus
drop if IncidentStatus=="REFERRED"
//15% of complaints are referred outside TCEQ; drop these
drop year month IncidentRecDate ComplaintIncident IncidentStatus
forvalues i=1(1)97{
	gen ind`i'=1 if SIC_2digit1==`i' | SIC_2digit2==`i' | SIC_2digit3==`i' | SIC_2digit4==`i' | SIC_2digit5==`i' | SIC_2digit6==`i' | SIC_2digit7==`i' | SIC_2digit8==`i' | SIC_2digit9==`i' | SIC_2digit10==`i' | SIC_2digit11==`i' | SIC_2digit12==`i' 
}
drop SIC_2digit1-SIC_2digit12
save "$processed_data/ind_temp.dta", replace
clear
foreach i of numlist 1 2 7/10 12/17 20/55 57/59 65 67 70 72 73 75 76 79/80 82 86/89 91 92 95/97{
	use "$processed_data/ind_temp.dta"
	keep ind`i' mdate 
	keep if ind`i'==1
	drop ind`i'
	gen ind=`i'
	gen complaint=1
	sort ind mdate
	collapse (sum) complaint, by(ind mdate)
	rename complaint ind_complaint
	label var ind_complaint "# air complaints in ind-month"
	sort ind mdate
	save "$processed_data/Industry/industry `i' complaints.dta", replace
}
clear 
//change directory to easily append all industry files
cd "$processed_data/Industry"
append using `: dir . files "*.dta"', keep(mdate ind ind_complaint)

//change directory back
cd "$processed_data"
xtset ind mdate
tsfill, full
replace ind_complaint= 0 if ind_complaint==.
rename ind_complaint ind_air_complaint
rename ind SIC_2digit
sort SIC_2digit mdate
save "$processed_data/industry_air_complaints.dta", replace

erase "$processed_data/ind_temp.dta"
erase "$processed_data/SIC_codes_temp.dta"

//reset directory to continue the data prep process
cd "$code/data_prep_code"
